Confirmatory Factor Analysis and Structural Equation Models

Work in Progress

cfa
sem
measurment
library(lavaan)
library(dplyr)
library(reticulate)
library(marginaleffects)
library(modelsummary)
library(ggplot2)
library(egg)
library(lme4)
library(semPlot)
library(tinytable)
library(kableExtra)
library(reshape2)
reticulate::py_run_string("import pymc as pm")

options(rstudio.python.installationPath = "/Users/nathanielforde/mambaforge/envs")
options("modelsummary_factory_default" = "tinytable")
options(repr.plot.width=15, repr.plot.height=8)

knitr::knit_engines$set(python = reticulate::eng_python)
options(scipen=999)
set.seed(130)
Warning

This is a work in progress. We intend to elaborate the choices related to analysing and understanding the data elicted through intentional psychometrics surveys

Measurment and Measurment Constructs

df = read.csv('sem_data.csv')
inflate <- df$region == 'west'
noise <- rnorm(sum(inflate), 0.5, 1) # generate the noise to add
df$ls_p3[inflate] <- df$ls_p3[inflate] + noise
df$ls_sum <- df$ls_p1 + df$ls_p2 + df$ls_p3
df$ls_mean <- rowMeans(df[ c('ls_p1', 'ls_p2', 'ls_p3')])

head(df) |> kable()
ID region gender age se_acad_p1 se_acad_p2 se_acad_p3 se_social_p1 se_social_p2 se_social_p3 sup_friends_p1 sup_friends_p2 sup_friends_p3 sup_parents_p1 sup_parents_p2 sup_parents_p3 ls_p1 ls_p2 ls_p3 ls_sum ls_mean
1 west female 13 4.857143 5.571429 4.500000 5.80 5.500000 5.40 6.5 6.5 7.0 7.0 7.0 6.0 5.333333 6.75 7.647112 19.73044 6.576815
2 west male 14 4.571429 4.285714 4.666667 5.00 5.500000 4.80 4.5 4.5 5.5 5.0 6.0 4.5 4.333333 5.00 4.469623 13.80296 4.600985
10 west female 14 4.142857 6.142857 5.333333 5.20 4.666667 6.00 4.0 4.5 3.5 7.0 7.0 6.5 6.333333 5.50 4.710020 16.54335 5.514451
11 west female 14 5.000000 5.428571 4.833333 6.40 5.833333 6.40 7.0 7.0 7.0 7.0 7.0 7.0 4.333333 6.50 5.636198 16.46953 5.489844
12 west female 14 5.166667 5.600000 4.800000 5.25 5.400000 5.25 7.0 7.0 7.0 6.5 6.5 7.0 5.666667 6.00 5.266592 16.93326 5.644419
14 west male 14 4.857143 4.857143 4.166667 5.20 5.000000 4.20 5.5 6.5 7.0 6.5 6.5 6.5 5.000000 5.50 5.913709 16.41371 5.471236

Candidate Structure

#| code-fold: true
#| code-summary: "Show the code"

datasummary_skim(df)|> 
 style_tt(
   i = 15:17,
   j = 1:1,
   background = "#20AACC",
   color = "white",
   italic = TRUE) |> 
 style_tt(
   i = 18:19,
   j = 1:1,
   background = "#2888A0",
   color = "white",
   italic = TRUE) |> 
 style_tt(
   i = 2:14,
   j = 1:1,
   background = "#17C2AD",
   color = "white",
   italic = TRUE)
tinytable_ftyu5q15n5ibjk84ch05
Unique Missing Pct. Mean SD Min Median Max Histogram
ID 283 0 187.9 106.3 1.0 201.0 367.0
age 5 0 14.7 0.8 13.0 15.0 17.0
se_acad_p1 32 0 5.2 0.8 3.1 5.1 7.0
se_acad_p2 36 0 5.3 0.7 3.1 5.4 7.0
se_acad_p3 29 0 5.2 0.8 2.8 5.2 7.0
se_social_p1 24 0 5.3 0.8 1.8 5.4 7.0
se_social_p2 27 0 5.5 0.7 2.7 5.5 7.0
se_social_p3 31 0 5.4 0.8 3.0 5.5 7.0
sup_friends_p1 13 0 5.8 1.1 1.0 6.0 7.0
sup_friends_p2 10 0 6.0 0.9 2.5 6.0 7.0
sup_friends_p3 13 0 6.0 1.1 1.0 6.0 7.0
sup_parents_p1 11 0 6.0 1.1 1.0 6.0 7.0
sup_parents_p2 11 0 5.9 1.1 2.0 6.0 7.0
sup_parents_p3 13 0 5.7 1.1 1.0 6.0 7.0
ls_p1 15 0 5.2 0.9 2.0 5.3 7.0
ls_p2 21 0 5.8 0.7 2.5 5.8 7.0
ls_p3 161 0 5.5 1.1 1.7 5.6 9.6
ls_sum 218 0 16.5 2.2 8.2 16.7 21.0
ls_mean 217 0 5.5 0.7 2.7 5.6 7.0
N %
region east 142 50.2
west 141 49.8
gender female 132 46.6
male 151 53.4
drivers = c('se_acad_p1', 'se_acad_p2', 'se_acad_p3', 'se_social_p1', 'se_social_p2', 'se_social_p3', 'sup_friends_p1','sup_friends_p2', 'sup_friends_p3', 'sup_parents_p1' , 'sup_parents_p2' , 'sup_parents_p3', 'ls_p1', 'ls_p2', 'ls_p3')



plot_heatmap <- function(df, title="Sample Covariances", subtitle="Observed Measures") {
  heat_df = df |> as.matrix() |> melt()
  colnames(heat_df) <- c("x", "y", "value")
  g <- heat_df |> mutate(value = round(value, 2)) |>  ggplot(aes(x = x, y = y, fill = value)) +
    geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
   scale_fill_gradient2(
      high = 'dodgerblue4',
      mid = 'white',
      low = 'firebrick2'
    ) + theme(axis.text.x = element_text(angle=45)) + ggtitle(title, subtitle)
  
  g
}

g1 = plot_heatmap(cov(df[,  drivers]))

g2 = plot_heatmap(cor(df[,  drivers]), "Sample Correlations")

plot <- ggarrange(g1,g2, ncol=1, nrow=2);

Fit Initial Regression Models

formula_sum_1st = " ls_sum ~ se_acad_p1  + se_social_p1 +  sup_friends_p1  + sup_parents_p1"
formula_mean_1st = " ls_mean ~ se_acad_p1  + se_social_p1 +  sup_friends_p1  + sup_parents_p1"

formula_sum_12 = " ls_sum ~ se_acad_p1  + se_acad_p2 +  se_social_p1 + se_social_p2 + 
sup_friends_p1 + sup_friends_p2  + sup_parents_p1 + sup_parents_p2"
formula_mean_12 = " ls_mean ~ se_acad_p1  + se_acad_p2 +  se_social_p1 + se_social_p2 + 
sup_friends_p1 + sup_friends_p2  + sup_parents_p1 + sup_parents_p2"


formula_sum = " ls_sum ~ se_acad_p1 + se_acad_p2 + se_acad_p3 + se_social_p1 +  se_social_p2 + se_social_p3 +  sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + sup_parents_p1 + sup_parents_p2 + sup_parents_p3"
formula_mean = " ls_mean ~ se_acad_p1 + se_acad_p2 + se_acad_p3 + se_social_p1 +  se_social_p2 + se_social_p3 +  sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + sup_parents_p1 + sup_parents_p2 + sup_parents_p3"
mod_sum = lm(formula_sum, df)
mod_sum_1st = lm(formula_sum_1st, df)
mod_sum_12 = lm(formula_sum_12, df)
mod_mean = lm(formula_mean, df)
mod_mean_1st = lm(formula_mean_1st, df)
mod_mean_12 = lm(formula_mean_12, df)

min_max_norm <- function(x) {
    (x - min(x)) / (max(x) - min(x))
}

df_norm <- as.data.frame(lapply(df[c(5:19)], min_max_norm))

df_norm$ls_sum <- df$ls_sum
df_norm$ls_mean <- df$ls_mean

mod_sum_norm = lm(formula_sum, df_norm)
mod_mean_norm = lm(formula_mean, df_norm)

models = list(
    "Outcome: sum_score" = list("model_sum_1st_factors" = mod_sum_1st,
     "model_sum_1st_2nd_factors" = mod_sum_12,
     "model_sum_score" = mod_sum,
     "model_sum_score_norm" = mod_sum_norm
     ),
    "Outcome: mean_score" = list(
      "model_mean_1st_factors" = mod_mean_1st,
     "model_mean_1st_2nd_factors" = mod_mean_12,
     "model_mean_score"= mod_mean, 
     "model_mean_score_norm" = mod_mean_norm
    )
    )
#| code-fold: true
#| code-summary: "Show the code"

modelsummary(models, stars=TRUE, shape ="cbind") |> 
 style_tt(
   i = 2:25,
   j = 1:1,
   background = "#17C2AD",
   color = "white",
   italic = TRUE)
tinytable_jb9uvnwer8qg2neciem3
Outcome: sum_score Outcome: mean_score
model_sum_1st_factors model_sum_1st_2nd_factors model_sum_score model_sum_score_norm model_mean_1st_factors model_mean_1st_2nd_factors model_mean_score model_mean_score_norm
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 6.335*** 4.038*** 3.709*** 9.081*** 2.112*** 1.346*** 1.236*** 3.027***
(0.979) (1.080) (1.071) (0.739) (0.326) (0.360) (0.357) (0.246)
se_acad_p1 0.231 -0.023 -0.068 -0.264 0.077 -0.008 -0.023 -0.088
(0.158) (0.197) (0.216) (0.833) (0.053) (0.066) (0.072) (0.278)
se_social_p1 1.039*** 0.515* 0.401+ 2.085+ 0.346*** 0.172* 0.134+ 0.695+
(0.175) (0.224) (0.224) (1.167) (0.058) (0.075) (0.075) (0.389)
sup_friends_p1 0.069 -0.263+ -0.273 -1.636 0.023 -0.088+ -0.091 -0.545
(0.095) (0.145) (0.169) (1.011) (0.032) (0.048) (0.056) (0.337)
sup_parents_p1 0.518*** 0.196 0.050 0.299 0.173*** 0.065 0.017 0.100
(0.107) (0.155) (0.161) (0.964) (0.036) (0.052) (0.054) (0.321)
se_acad_p2 0.415+ 0.382+ 1.475+ 0.138+ 0.127+ 0.492+
(0.216) (0.227) (0.875) (0.072) (0.076) (0.292)
se_social_p2 0.662** 0.483+ 2.093+ 0.221** 0.161+ 0.698+
(0.233) (0.246) (1.065) (0.078) (0.082) (0.355)
sup_friends_p2 0.358* 0.366* 1.647* 0.119* 0.122* 0.549*
(0.172) (0.180) (0.808) (0.057) (0.060) (0.269)
sup_parents_p2 0.375* 0.166 0.829 0.125* 0.055 0.276
(0.151) (0.162) (0.808) (0.050) (0.054) (0.269)
se_acad_p3 -0.072 -0.302 -0.024 -0.101
(0.196) (0.815) (0.065) (0.272)
se_social_p3 0.350+ 1.400+ 0.117+ 0.467+
(0.180) (0.721) (0.060) (0.240)
sup_friends_p3 0.056 0.334 0.019 0.111
(0.146) (0.878) (0.049) (0.293)
sup_parents_p3 0.452** 2.710** 0.151** 0.903**
(0.141) (0.847) (0.047) (0.282)
Num.Obs. 283 283 283 283 283 283 283 283
R2 0.332 0.389 0.420 0.420 0.332 0.389 0.420 0.420
R2 Adj. 0.323 0.371 0.394 0.394 0.323 0.371 0.394 0.394
AIC 1134.2 1117.0 1110.3 1110.3 512.4 495.2 488.5 488.5
BIC 1156.1 1153.5 1161.3 1161.3 534.2 531.7 539.5 539.5
Log.Lik. -561.090 -548.507 -541.139 -541.139 -250.183 -237.600 -230.232 -230.232
RMSE 1.76 1.68 1.64 1.64 0.59 0.56 0.55 0.55
models = list(
     "model_sum_1st_factors" = mod_sum_1st,
     "model_sum_1st_2nd_factors" = mod_sum_12,
     "model_sum_score" = mod_sum,
     "model_sum_score_norm" = mod_sum_norm,
     "model_mean_1st_factors" = mod_mean_1st,
     "model_mean_1st_2nd_factors" = mod_mean_12,
     "model_mean_score"= mod_mean,
     "model_mean_score_norm" = mod_mean_norm
    )

modelplot(models, coef_omit = 'Intercept') + geom_vline(xintercept = 0, linetype="dotted", 
                color = "black") + ggtitle("Comparing Model Parameter Estimates", "Across Covariates")

Significant Coefficients

g1 = modelplot(mod_sum, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"),  guide = "none")


g2 = modelplot(mod_mean, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"))

plot <- ggarrange(g1,g2, ncol=2, nrow=1);

Aggregate Driver Scores

df$se_acad_mean <- rowMeans(df[c('se_acad_p1', 'se_acad_p2', 'se_acad_p3')])
df$se_social_mean <- rowMeans(df[c('se_social_p1', 'se_social_p2', 'se_social_p3')])
df$sup_friends_mean <- rowMeans(df[c('sup_friends_p1', 'sup_friends_p2', 'sup_friends_p3')])
df$sup_parents_mean <- rowMeans(df[c('sup_parents_p1', 'sup_parents_p2', 'sup_parents_p3')])


formula_parcel_mean = "ls_mean ~ se_acad_mean + se_social_mean + sup_friends_mean + sup_parents_mean"

formula_parcel_sum = "ls_sum ~ se_acad_mean + se_social_mean + sup_friends_mean + sup_parents_mean"

mod_sum_parcel = lm(formula_parcel_sum, df)
mod_mean_parcel = lm(formula_parcel_mean, df)

models_parcel = list("model_sum_score" = mod_sum_parcel,
     "model_mean_score"= mod_mean_parcel
     )

modelsummary(models_parcel, stars=TRUE)
tinytable_or59mjdhizio3l7k66t7
model_sum_score model_mean_score
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 4.378*** 1.459***
(1.036) (0.345)
se_acad_mean 0.252 0.084
(0.176) (0.059)
se_social_mean 1.205*** 0.402***
(0.195) (0.065)
sup_friends_mean 0.049 0.016
(0.108) (0.036)
sup_parents_mean 0.685*** 0.228***
(0.111) (0.037)
Num.Obs. 283 283
R2 0.397 0.397
R2 Adj. 0.388 0.388
AIC 1105.3 483.5
BIC 1127.1 505.3
Log.Lik. -546.638 -235.731
RMSE 1.67 0.56
g1 = modelplot(mod_sum_parcel, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"), guide = "none")


g2 = modelplot(mod_mean_parcel, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"))

plot <- ggarrange(g1,g2, ncol=2, nrow=1);

Hierarchical Models

formula_hierarchy_mean = "ls_mean ~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + se_acad_p1 + se_acad_p2 + se_acad_p3 +
se_social_p1 + se_social_p2 + se_social_p3  + (1 | region)"

formula_hierarchy_sum = "ls_sum ~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + se_acad_p1 + se_acad_p2 + se_acad_p3 +
se_social_p1 + se_social_p2 + se_social_p3 + (1 | region)"

hierarchical_mean_fit <- lmer(formula_hierarchy_mean, data = df, REML = TRUE)
hierarchical_sum_fit <- lmer(formula_hierarchy_sum, data = df, REML = TRUE)

g1 = modelplot(hierarchical_mean_fit, re.form=NA) +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"),  guide = "none")

g2 = modelplot(hierarchical_sum_fit, re.form=NA) +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"))


plot <- ggarrange(g1,g2, ncol=2, nrow=1);
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).
Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).

modelsummary(list("hierarchical_mean_fit"= hierarchical_mean_fit,
                  "hierarchical_sum_fit"= hierarchical_sum_fit), 
             stars = TRUE) |> 
 style_tt(
   i = 2:25,
   j = 1:1,
   background = "#17C2AD",
   color = "white",
   italic = TRUE)
tinytable_ftl5t6kstw3adyosicbz
hierarchical_mean_fit hierarchical_sum_fit
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 0.903* 2.710*
(0.385) (1.154)
sup_parents_p1 -0.007 -0.021
(0.053) (0.160)
sup_parents_p2 0.070 0.210
(0.053) (0.159)
sup_parents_p3 0.160*** 0.480***
(0.046) (0.139)
sup_friends_p1 -0.091+ -0.274+
(0.055) (0.166)
sup_friends_p2 0.125* 0.376*
(0.059) (0.177)
sup_friends_p3 0.024 0.072
(0.048) (0.144)
se_acad_p1 -0.041 -0.122
(0.071) (0.213)
se_acad_p2 0.131+ 0.393+
(0.074) (0.223)
se_acad_p3 0.039 0.116
(0.067) (0.202)
se_social_p1 0.094 0.283
(0.075) (0.224)
se_social_p2 0.191* 0.574*
(0.081) (0.244)
se_social_p3 0.130* 0.389*
(0.059) (0.178)
SD (Intercept region) 0.160 0.481
SD (Observations) 0.549 1.648
Num.Obs. 283 283
R2 Marg. 0.424 0.424
R2 Cond. 0.469 0.469
AIC 538.4 1131.6
BIC 593.1 1186.3
ICC 0.1 0.1
RMSE 0.54 1.61

Marginal Effects

pred <- predictions(hierarchical_mean_fit, newdata = datagrid(sup_parents_p3 = 1:10, region=c('east', 'west')), re.form=NA)
pred1 <- predictions(mod_mean, newdata = datagrid(sup_parents_p2 = 1:10))

pred1 |> head() |> kableExtra::kable()
rowid estimate std.error statistic p.value s.value conf.low conf.high se_acad_p1 se_acad_p2 se_acad_p3 se_social_p1 se_social_p2 se_social_p3 sup_friends_p1 sup_friends_p2 sup_friends_p3 sup_parents_p1 sup_parents_p3 sup_parents_p2 ls_mean
1 5.232276 0.2673967 19.56747 0 280.8136 4.708188 5.756363 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 1 6.576815
2 5.287553 0.2140733 24.69973 0 445.0319 4.867977 5.707128 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 2 6.576815
3 5.342829 0.1610972 33.16525 0 798.8130 5.027085 5.658574 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 3 6.576815
4 5.398106 0.1089765 49.53461 0 Inf 5.184516 5.611696 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 4 6.576815
5 5.453383 0.0599835 90.91472 0 Inf 5.335818 5.570949 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 5 6.576815
6 5.508660 0.0334480 164.69327 0 Inf 5.443103 5.574217 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 6 6.576815

Regression Marginal Effects

g = plot_predictions(mod_mean, condition = c("sup_parents_p3"), type = "response") + ggtitle("Counterfactual Shift of Outcome: sup_parents_p3", "Holding all else Fixed")

g1 = plot_predictions(mod_mean, condition = c("sup_friends_p1"), type = "response") + ggtitle("Counterfactual Shift of Outcome: sup_friends_p1", "Holding all else Fixed")

g2 = plot_predictions(mod_mean, condition = c("se_acad_p1"), 
                      type = "response") + ggtitle("Counterfactual Shift of Outcome: se_acad_p1", "Holding all else Fixed")

plot <- ggarrange(g,g1,g2, ncol=1, nrow=3);

Hierarchical Marginal Effects

g = plot_predictions(hierarchical_mean_fit, condition = c("sup_parents_p3", "region"), type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: sup_parents_p3", "Holding all else Fixed")

g1 = plot_predictions(hierarchical_mean_fit, condition = c("sup_friends_p1", "region"), type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: sup_friends_p1", "Holding all else Fixed")

g2 = plot_predictions(hierarchical_mean_fit, condition = c("se_acad_p1", "region"), 
                      type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: se_acad_p1", "Holding all else Fixed")

plot <- ggarrange(g,g1,g2, ncol=1, nrow=3);

Confirmatory Factor Analysis

model_measurement <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ sup_friends_p1 + sup_friends_p2 + sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS  =~ ls_p1 + ls_p2 + ls_p3
"

model_measurement1 <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ a1*sup_friends_p1 + a2*sup_friends_p2 + a3*sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS  =~ ls_p1 + ls_p2 + ls_p3

a1 == a2 
a1 == a3
"


fit_mod <- cfa(model_measurement, data = df)
fit_mod_1<- cfa(model_measurement1, data = df)


cfa_models = list("full_measurement_model" = fit_mod, 
     "measurement_model_reduced" = fit_mod_1)
modelplot(cfa_models)

summary(fit_mod, fit.measures = TRUE, standardized = TRUE) 
lavaan 0.6-18 ended normally after 46 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        40

  Number of observations                           283

Model Test User Model:
                                                      
  Test statistic                               207.137
  Degrees of freedom                                80
  P-value (Chi-square)                           0.000

Model Test Baseline Model:

  Test statistic                              2586.651
  Degrees of freedom                               105
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.949
  Tucker-Lewis Index (TLI)                       0.933

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)              -4394.212
  Loglikelihood unrestricted model (H1)      -4290.643
                                                      
  Akaike (AIC)                                8868.423
  Bayesian (BIC)                              9014.241
  Sample-size adjusted Bayesian (SABIC)       8887.401

Root Mean Square Error of Approximation:

  RMSEA                                          0.075
  90 Percent confidence interval - lower         0.062
  90 Percent confidence interval - upper         0.088
  P-value H_0: RMSEA <= 0.050                    0.001
  P-value H_0: RMSEA >= 0.080                    0.263

Standardized Root Mean Square Residual:

  SRMR                                           0.056

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Expected
  Information saturated (h1) model          Structured

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  SUP_Parents =~                                                        
    sup_parents_p1    1.000                               0.938    0.876
    sup_parents_p2    1.034    0.056   18.570    0.000    0.970    0.888
    sup_parents_p3    0.988    0.059   16.637    0.000    0.927    0.811
  SUP_Friends =~                                                        
    sup_friends_p1    1.000                               1.021    0.906
    sup_friends_p2    0.793    0.043   18.466    0.000    0.809    0.857
    sup_friends_p3    0.891    0.050   17.735    0.000    0.910    0.831
  SE_Academic =~                                                        
    se_acad_p1        1.000                               0.697    0.880
    se_acad_p2        0.806    0.049   16.278    0.000    0.561    0.819
    se_acad_p3        0.952    0.058   16.491    0.000    0.663    0.828
  SE_Social =~                                                          
    se_social_p1      1.000                               0.640    0.846
    se_social_p2      0.959    0.055   17.338    0.000    0.614    0.881
    se_social_p3      0.926    0.066   13.956    0.000    0.593    0.742
  LS =~                                                                 
    ls_p1             1.000                               0.677    0.729
    ls_p2             0.820    0.078   10.488    0.000    0.555    0.762
    ls_p3             0.744    0.109    6.809    0.000    0.504    0.459

Covariances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  SUP_Parents ~~                                                        
    SUP_Friends       0.134    0.064    2.094    0.036    0.140    0.140
    SE_Academic       0.219    0.046    4.717    0.000    0.335    0.335
    SE_Social         0.284    0.046    6.239    0.000    0.473    0.473
    LS                0.360    0.056    6.455    0.000    0.567    0.567
  SUP_Friends ~~                                                        
    SE_Academic       0.071    0.047    1.493    0.135    0.100    0.100
    SE_Social         0.195    0.046    4.262    0.000    0.299    0.299
    LS                0.184    0.053    3.500    0.000    0.267    0.267
  SE_Academic ~~                                                        
    SE_Social         0.274    0.036    7.525    0.000    0.613    0.613
    LS                0.212    0.039    5.407    0.000    0.449    0.449
  SE_Social ~~                                                          
    LS                0.330    0.043    7.650    0.000    0.761    0.761

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .sup_parents_p1    0.268    0.037    7.143    0.000    0.268    0.233
   .sup_parents_p2    0.253    0.038    6.595    0.000    0.253    0.212
   .sup_parents_p3    0.446    0.048    9.262    0.000    0.446    0.342
   .sup_friends_p1    0.228    0.040    5.663    0.000    0.228    0.179
   .sup_friends_p2    0.237    0.030    7.926    0.000    0.237    0.266
   .sup_friends_p3    0.371    0.042    8.811    0.000    0.371    0.310
   .se_acad_p1        0.141    0.022    6.487    0.000    0.141    0.226
   .se_acad_p2        0.154    0.018    8.648    0.000    0.154    0.329
   .se_acad_p3        0.202    0.024    8.397    0.000    0.202    0.315
   .se_social_p1      0.163    0.020    8.095    0.000    0.163    0.284
   .se_social_p2      0.109    0.016    6.782    0.000    0.109    0.224
   .se_social_p3      0.287    0.028   10.141    0.000    0.287    0.450
   .ls_p1             0.404    0.048    8.422    0.000    0.404    0.469
   .ls_p2             0.223    0.029    7.624    0.000    0.223    0.420
   .ls_p3             0.951    0.085   11.123    0.000    0.951    0.789
    SUP_Parents       0.880    0.098    8.937    0.000    1.000    1.000
    SUP_Friends       1.042    0.111    9.405    0.000    1.000    1.000
    SE_Academic       0.485    0.054    8.908    0.000    1.000    1.000
    SE_Social         0.410    0.048    8.459    0.000    1.000    1.000
    LS                0.458    0.072    6.323    0.000    1.000    1.000
g1 = plot_heatmap(cov(df[,  drivers]))

g2 = plot_heatmap(data.frame(fitted(fit_mod)$cov), title="Model Implied Covariances", "Fitted Values")

plot <- ggarrange(g1,g2, ncol=1, nrow=2);

Modification Indices

modindices(fit_mod, sort = TRUE, maximum.number = 6, standardized = FALSE, minimum.value = 5)
               lhs op            rhs     mi    epc
152 sup_friends_p1 ~~   se_social_p3 19.245  0.095
68     SUP_Friends =~          ls_p2 17.491  0.181
64     SUP_Friends =~   se_social_p1 17.210 -0.136
60     SUP_Friends =~ sup_parents_p3 16.063 -0.187
210          ls_p2 ~~          ls_p3 13.931 -0.140
57     SUP_Parents =~          ls_p3 13.057  0.329

Summary Global Fit Measures

summary_df = cbind(fitMeasures(fit_mod, c("chisq", "baseline.chisq", "cfi", "aic", "bic", "rmsea","srmr")),
      fitMeasures(fit_mod_1, c("chisq", "baseline.chisq", "cfi", "aic", "bic", "rmsea","srmr")))
colnames(summary_df) = c('Full Model', 'Reduced Model')

summary_df
                  Full Model Reduced Model
chisq           207.13746909  225.18095274
baseline.chisq 2586.65112811 2586.65112811
cfi               0.94876900    0.94230416
aic            8868.42313715 8882.46662079
bic            9014.24101305 9020.99360290
rmsea             0.07493739    0.07854933
srmr              0.05595908    0.05904842
semPlot::semPaths(fit_mod, whatLabels = 'est', intercepts = FALSE)

semPlot::semPaths(fit_mod_1, whatLabels = 'std', intercepts = FALSE)

Comparing the empirical and implied variance-covariance matrix

heat_df = data.frame(resid(fit_mod, type = "standardized")$cov) 
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")

g1 = heat_df  |> mutate(value = round(value, 2)) |>  ggplot(aes(x = x, y = y, fill = value)) +
  geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
 scale_fill_gradient2(
    high = 'dodgerblue4',
    mid = 'white',
    low = 'firebrick2'
  ) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit: Full Model")

heat_df = data.frame(resid(fit_mod_1, type = "standardized")$cov) 
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")

g2 = heat_df  |> mutate(value = round(value, 2)) |>  ggplot(aes(x = x, y = y, fill = value)) +
  geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
 scale_fill_gradient2(
    high = 'dodgerblue4',
    mid = 'white',
    low = 'firebrick2'
  ) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit: Reduced Model")

plot <- ggarrange(g1,g2, ncol=1, nrow=2);

anova(fit_mod)
Chi-Squared Test Statistic (unscaled)

          Df    AIC    BIC  Chisq Chisq diff Df diff         Pr(>Chisq)    
Saturated  0                 0.00                                          
Model     80 8868.4 9014.2 207.14     207.14      80 0.0000000000003209 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_mod_1)
Chi-Squared Test Statistic (unscaled)

          Df    AIC  BIC  Chisq Chisq diff Df diff           Pr(>Chisq)    
Saturated  0               0.00                                            
Model     82 8882.5 9021 225.18     225.18      82 0.000000000000002776 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_mod, fit_mod_1)

Chi-Squared Difference Test

          Df    AIC    BIC  Chisq Chisq diff   RMSEA Df diff Pr(>Chisq)    
fit_mod   80 8868.4 9014.2 207.14                                          
fit_mod_1 82 8882.5 9021.0 225.18     18.044 0.16836       2  0.0001208 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Structural Equation Models

model <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ sup_friends_p1 + sup_friends_p2 + sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS  =~ ls_p1 + ls_p2 + ls_p3

# Structural model 
# Regressions
LS ~ SE_Academic + SE_Social + SUP_Parents + SUP_Friends

# Residual covariances
SE_Academic ~~ SE_Social
"

fit_mod_sem <- sem(model, data = df)
modelplot(fit_mod_sem)

semPlot::semPaths(fit_mod_sem, whatLabels = 'std', intercepts = FALSE)

heat_df = data.frame(resid(fit_mod_sem, type = "standardized")$cov) 
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")

heat_df  |> mutate(value = round(value, 2)) |>  ggplot(aes(x = x, y = y, fill = value)) +
  geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
 scale_fill_gradient2(
    high = 'dodgerblue4',
    mid = 'white',
    low = 'firebrick2'
  ) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit")

Confirmatory Factor Models with PyMC

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pytensor import tensor as pt
import arviz as az
import networkx as nx



df_p = pd.read_csv('IIS.dat', sep='\s+')
df_p.head() 
     PI    AD   IGC   FI   FC
0  4.00  3.38  4.67  2.6  3.2
1  2.57  3.00  3.50  2.4  2.8
2  2.29  3.29  4.83  2.0  3.4
3  2.43  3.63  4.33  3.6  3.8
4  3.00  4.00  4.83  3.4  3.8
coords = {'obs': list(range(len(df_p))), 
          'indicators': ['PI', 'AD',    'IGC', 'FI', 'FC'],
          'indicators_1': ['PI', 'AD',  'IGC'],
          'indicators_2': ['FI', 'FC'],
          'latent': ['Student', 'Faculty']
          }


obs_idx = list(range(len(df_p)))
with pm.Model(coords=coords) as model:
  
  Psi = pm.InverseGamma('Psi', 5, 10, dims='indicators')
  lambdas_ = pm.Normal('lambdas_1', 1, 10, dims=('indicators_1'))
  lambdas_1 = pm.Deterministic('lambdas1', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_1'))
  lambdas_ = pm.Normal('lambdas_2', 1, 10, dims=('indicators_2'))
  lambdas_2 = pm.Deterministic('lambdas2', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_2'))
  tau = pm.Normal('tau', 3, 10, dims='indicators')
  kappa = 0
  sd_dist = pm.Exponential.dist(1.0, shape=2)
  chol, _, _ = pm.LKJCholeskyCov('chol_cov', n=2, eta=2,
    sd_dist=sd_dist, compute_corr=True)
  ksi = pm.MvNormal('ksi', kappa, chol=chol, dims=('obs', 'latent'))

  m1 = tau[0] + ksi[obs_idx, 0]*lambdas_1[0]
  m2 = tau[1] + ksi[obs_idx, 0]*lambdas_1[1]
  m3 = tau[2] + ksi[obs_idx, 0]*lambdas_1[2]
  m4 = tau[3] + ksi[obs_idx, 1]*lambdas_2[0]
  m5 = tau[4] + ksi[obs_idx, 1]*lambdas_2[1]
  
  mu = pm.Deterministic('mu', pm.math.stack([m1, m2, m3, m4, m5]).T)
  _  = pm.Normal('likelihood', mu, Psi, observed=df_p.values)

  idata = pm.sample(nuts_sampler='numpyro', target_accept=.95, 
                    idata_kwargs={"log_likelihood": True})
  idata.extend(pm.sample_posterior_predictive(idata))
  
summary_df = az.summary(idata, var_names=['lambdas1', 'lambdas2', 'tau', 'Psi', 'ksi', 'chol_cov_corr'], coords= {'obs': [0, 7]})

PyMC Confirmatory Factor Model

py$summary_df |> kable() |>  kable_classic(full_width = F, html_font = "Cambria")
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
lambdas1[PI] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas1[AD] 0.906 0.061 0.790 1.015 0.004 0.003 312 459 1.02
lambdas1[IGC] 0.540 0.045 0.456 0.626 0.002 0.002 475 660 1.01
lambdas2[FI] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas2[FC] 0.981 0.056 0.877 1.084 0.003 0.002 493 1106 1.01
tau[PI] 3.334 0.037 3.264 3.403 0.002 0.001 545 1185 1.01
tau[AD] 3.899 0.027 3.850 3.949 0.001 0.001 385 963 1.02
tau[IGC] 4.597 0.021 4.557 4.637 0.001 0.001 657 1296 1.01
tau[FI] 3.035 0.040 2.962 3.109 0.002 0.001 470 1177 1.01
tau[FC] 3.714 0.035 3.651 3.781 0.002 0.001 399 1021 1.01
Psi[PI] 0.611 0.024 0.566 0.655 0.001 0.000 1190 2184 1.00
Psi[AD] 0.316 0.020 0.279 0.352 0.001 0.001 645 1451 1.00
Psi[IGC] 0.355 0.013 0.330 0.379 0.000 0.000 2609 2701 1.00
Psi[FI] 0.569 0.026 0.523 0.619 0.001 0.001 992 2041 1.01
Psi[FC] 0.419 0.025 0.372 0.467 0.001 0.001 735 921 1.00
ksi[0, Student] -0.225 0.219 -0.634 0.183 0.003 0.003 4000 2685 1.00
ksi[0, Faculty] -0.369 0.270 -0.866 0.140 0.004 0.004 3743 2991 1.00
ksi[7, Student] 0.886 0.226 0.468 1.307 0.004 0.003 3293 3064 1.00
ksi[7, Faculty] 0.873 0.270 0.365 1.374 0.004 0.003 4753 3043 1.00
chol_cov_corr[0, 0] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
chol_cov_corr[0, 1] 0.847 0.028 0.793 0.898 0.001 0.001 487 776 1.00
chol_cov_corr[1, 0] 0.847 0.028 0.793 0.898 0.001 0.001 487 776 1.00
chol_cov_corr[1, 1] 1.000 0.000 1.000 1.000 0.000 0.000 3772 3708 1.00
az.plot_trace(idata, var_names=['lambdas1', 'lambdas2', 'tau', 'Psi', 'ksi']);
/Users/nathanielforde/mambaforge/envs/pymc_causal/lib/python3.11/site-packages/arviz/stats/density_utils.py:487: UserWarning: Your data appears to have a single value or no finite values
  warnings.warn("Your data appears to have a single value or no finite values")
/Users/nathanielforde/mambaforge/envs/pymc_causal/lib/python3.11/site-packages/arviz/stats/density_utils.py:487: UserWarning: Your data appears to have a single value or no finite values
  warnings.warn("Your data appears to have a single value or no finite values")

df = pd.read_csv('sem_data.csv')
drivers = ['se_acad_p1', 'se_acad_p2',
       'se_acad_p3', 'se_social_p1', 'se_social_p2', 'se_social_p3',
       'sup_friends_p1', 'sup_friends_p2', 'sup_friends_p3', 'sup_parents_p1',
       'sup_parents_p2', 'sup_parents_p3', 'ls_p1', 'ls_p2', 'ls_p3']
       
       
coords = {'obs': list(range(len(df))), 
          'indicators': drivers,
          'indicators_1': ['se_acad_p1','se_acad_p2','se_acad_p3'],
          'indicators_2': ['se_social_p1','se_social_p2','se_social_p3'],
          'indicators_3': ['sup_friends_p1','sup_friends_p2','sup_friends_p3'],
          'indicators_4': [ 'sup_parents_p1','sup_parents_p2','sup_parents_p3'],
          'indicators_5': ['ls_p1','ls_p2', 'ls_p3'],
          'latent': ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS'],
          'latent1': ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']
          }

obs_idx = list(range(len(df)))
with pm.Model(coords=coords) as model:
  
  Psi = pm.InverseGamma('Psi', 5, 10, dims='indicators')
  lambdas_ = pm.Normal('lambdas_1', 1, 10, dims=('indicators_1'))
  lambdas_1 = pm.Deterministic('lambdas1', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_1'))
  lambdas_ = pm.Normal('lambdas_2', 1, 10, dims=('indicators_2'))
  lambdas_2 = pm.Deterministic('lambdas2', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_2'))
  lambdas_ = pm.Normal('lambdas_3', 1, 10, dims=('indicators_3'))
  lambdas_3 = pm.Deterministic('lambdas3', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_3'))
  lambdas_ = pm.Normal('lambdas_4', 1, 10, dims=('indicators_4'))
  lambdas_4 = pm.Deterministic('lambdas4', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_4'))
  lambdas_ = pm.Normal('lambdas_5', 1, 10, dims=('indicators_5'))
  lambdas_5 = pm.Deterministic('lambdas5', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_5'))
  tau = pm.Normal('tau', 3, 10, dims='indicators')
  kappa = 0
  sd_dist = pm.Exponential.dist(1.0, shape=5)
  chol, _, _ = pm.LKJCholeskyCov('chol_cov', n=5, eta=2,
    sd_dist=sd_dist, compute_corr=True)
  cov = pm.Deterministic("cov", chol.dot(chol.T), dims=('latent', 'latent1'))
  ksi = pm.MvNormal('ksi', kappa, chol=chol, dims=('obs', 'latent'))

  m0 = tau[0] + ksi[obs_idx, 0]*lambdas_1[0]
  m1 = tau[1] + ksi[obs_idx, 0]*lambdas_1[1]
  m2 = tau[2] + ksi[obs_idx, 0]*lambdas_1[2]
  m3 = tau[3] + ksi[obs_idx, 1]*lambdas_2[0]
  m4 = tau[4] + ksi[obs_idx, 1]*lambdas_2[1]
  m5 = tau[5] + ksi[obs_idx, 1]*lambdas_2[2]
  m6 = tau[6] + ksi[obs_idx, 2]*lambdas_3[0]
  m7 = tau[7] + ksi[obs_idx, 2]*lambdas_3[1]
  m8 = tau[8] + ksi[obs_idx, 2]*lambdas_3[2]
  m9 = tau[9] + ksi[obs_idx, 3]*lambdas_4[0]
  m10 = tau[10] + ksi[obs_idx, 3]*lambdas_4[1]
  m11 = tau[11] + ksi[obs_idx, 3]*lambdas_4[2]
  m12 = tau[12] + ksi[obs_idx, 4]*lambdas_5[0]
  m13 = tau[13] + ksi[obs_idx, 4]*lambdas_5[1]
  m14 = tau[14] + ksi[obs_idx, 4]*lambdas_5[2]
  
  mu = pm.Deterministic('mu', pm.math.stack([m0, m1, m2, m3, m4, m5, m6, m7,
                                             m8, m9, m10, m11, m12, m13, m14]).T)
  _  = pm.Normal('likelihood', mu, Psi, observed=df[drivers].values)

  idata = pm.sample(nuts_sampler='numpyro', target_accept=.95, 
                    idata_kwargs={"log_likelihood": True})
  idata.extend(pm.sample_posterior_predictive(idata))
  
summary_df1 = az.summary(idata, var_names=['lambdas1', 'lambdas2', 'lambdas3', 'lambdas4', 'lambdas5', 'tau', 'Psi'])

cov_df = pd.DataFrame(az.extract(idata['posterior'])['cov'].mean(axis=2))
cov_df.index = ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']
cov_df.columns = ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']

correlation_df = pd.DataFrame(az.extract(idata['posterior'])['chol_cov_corr'].mean(axis=2))
correlation_df.index = ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']
correlation_df.columns = ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']

Life Satisfaction Model

py$summary_df1 |> kable() |>  kable_classic(full_width = F, html_font = "Cambria")
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
lambdas1[se_acad_p1] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas1[se_acad_p2] 0.818 0.053 0.720 0.920 0.002 0.001 850 1743 1.00
lambdas1[se_acad_p3] 0.969 0.063 0.846 1.080 0.002 0.002 828 1559 1.00
lambdas2[se_social_p1] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas2[se_social_p2] 0.958 0.058 0.854 1.068 0.003 0.002 552 1537 1.00
lambdas2[se_social_p3] 0.934 0.074 0.799 1.074 0.003 0.002 779 1613 1.00
lambdas3[sup_friends_p1] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas3[sup_friends_p2] 0.803 0.044 0.723 0.888 0.001 0.001 1069 1855 1.00
lambdas3[sup_friends_p3] 0.907 0.052 0.813 1.006 0.001 0.001 1223 2101 1.00
lambdas4[sup_parents_p1] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas4[sup_parents_p2] 1.040 0.058 0.931 1.142 0.002 0.002 683 1536 1.00
lambdas4[sup_parents_p3] 1.009 0.064 0.882 1.124 0.002 0.002 834 1858 1.00
lambdas5[ls_p1] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas5[ls_p2] 0.789 0.080 0.639 0.936 0.003 0.002 521 1039 1.01
lambdas5[ls_p3] 0.980 0.098 0.788 1.158 0.005 0.003 465 1208 1.01
tau[se_acad_p1] 5.155 0.047 5.068 5.243 0.002 0.002 372 741 1.01
tau[se_acad_p2] 5.346 0.041 5.273 5.426 0.002 0.001 413 940 1.01
tau[se_acad_p3] 5.211 0.048 5.121 5.300 0.002 0.002 432 1077 1.01
tau[se_social_p1] 5.291 0.046 5.203 5.372 0.002 0.002 338 796 1.01
tau[se_social_p2] 5.478 0.042 5.401 5.555 0.002 0.002 325 806 1.01
tau[se_social_p3] 5.441 0.048 5.342 5.525 0.002 0.002 415 1159 1.01
tau[sup_friends_p1] 5.786 0.069 5.659 5.913 0.004 0.003 276 702 1.01
tau[sup_friends_p2] 6.010 0.058 5.900 6.118 0.003 0.002 293 750 1.01
tau[sup_friends_p3] 5.990 0.067 5.857 6.108 0.004 0.003 327 782 1.01
tau[sup_parents_p1] 5.977 0.063 5.858 6.095 0.003 0.002 411 765 1.01
tau[sup_parents_p2] 5.930 0.064 5.800 6.045 0.003 0.002 385 772 1.01
tau[sup_parents_p3] 5.721 0.067 5.597 5.848 0.003 0.002 458 995 1.01
tau[ls_p1] 5.192 0.055 5.090 5.295 0.002 0.002 484 1322 1.01
tau[ls_p2] 5.777 0.043 5.691 5.854 0.002 0.001 477 1127 1.01
tau[ls_p3] 5.222 0.053 5.122 5.320 0.003 0.002 447 1078 1.01
Psi[se_acad_p1] 0.412 0.028 0.359 0.465 0.001 0.001 1254 2135 1.00
Psi[se_acad_p2] 0.413 0.024 0.369 0.458 0.001 0.000 1717 2815 1.00
Psi[se_acad_p3] 0.467 0.028 0.415 0.519 0.001 0.001 1339 1951 1.00
Psi[se_social_p1] 0.428 0.026 0.380 0.477 0.001 0.001 1114 2009 1.00
Psi[se_social_p2] 0.363 0.024 0.319 0.410 0.001 0.000 1387 2450 1.00
Psi[se_social_p3] 0.555 0.028 0.502 0.606 0.001 0.000 2842 2527 1.00
Psi[sup_friends_p1] 0.517 0.039 0.441 0.589 0.001 0.001 1008 1876 1.01
Psi[sup_friends_p2] 0.507 0.031 0.449 0.566 0.001 0.001 1676 2521 1.00
Psi[sup_friends_p3] 0.623 0.037 0.554 0.691 0.001 0.001 1978 2221 1.00
Psi[sup_parents_p1] 0.549 0.036 0.483 0.616 0.001 0.001 1340 1896 1.00
Psi[sup_parents_p2] 0.535 0.037 0.467 0.605 0.001 0.001 1168 1868 1.00
Psi[sup_parents_p3] 0.675 0.038 0.605 0.748 0.001 0.001 1955 2473 1.00
Psi[ls_p1] 0.670 0.038 0.603 0.743 0.001 0.001 1037 1661 1.00
Psi[ls_p2] 0.532 0.030 0.477 0.589 0.001 0.001 1376 1929 1.00
Psi[ls_p3] 0.624 0.037 0.554 0.692 0.001 0.001 1468 1738 1.00

fig, ax = plt.subplots(figsize=(15, 8))
az.plot_forest(idata, var_names=['lambdas1', 'lambdas2', 'lambdas3', 'lambdas4', 'lambdas5'], combined=True, ax=ax);
ax.set_title("Factor Loadings for each of the Five Factors");

py$cov_df |> kable(caption= "Covariances Amongst Latent Factors",digits=2) |> kable_styling() %>% kable_classic(full_width = F, html_font = "Cambria") |> row_spec(5, color = "red")
Covariances Amongst Latent Factors
SE_ACAD SE_SOCIAL SUP_F SUP_P LS
SE_ACAD 0.47 0.26 0.06 0.21 0.22
SE_SOCIAL 0.26 0.40 0.19 0.27 0.31
SUP_F 0.06 0.19 1.04 0.12 0.17
SUP_P 0.21 0.27 0.12 0.86 0.39
LS 0.22 0.31 0.17 0.39 0.43
py$correlation_df |> kable( caption= "Correlations Amongst Latent Factors", digits=2) |> kable_styling() |>  kable_classic(full_width = F, html_font = "Cambria") |> row_spec(5, color = "red")
Correlations Amongst Latent Factors
SE_ACAD SE_SOCIAL SUP_F SUP_P LS
SE_ACAD 1.00 0.60 0.09 0.32 0.50
SE_SOCIAL 0.60 1.00 0.29 0.46 0.75
SUP_F 0.09 0.29 1.00 0.13 0.25
SUP_P 0.32 0.46 0.13 1.00 0.64
LS 0.50 0.75 0.25 0.64 1.00

fig, axs = plt.subplots(5, 3, figsize=(20, 20))
axs = axs.flatten()
for i in range(15):
    temp = idata['posterior_predictive'].sel({'likelihood_dim_3': i}).mean(dim=('chain', 'draw'))
    axs[i].hist(df[drivers[i]], alpha=0.3, ec='black', bins=20, label='Observed Scores')
    axs[i].hist(temp['likelihood'], color='purple', alpha=0.6, bins=20, label='Predicted Scores')
    axs[i].set_title(f"Posterior Predictive Checks {drivers[i]}")
    axs[i].legend();

model_cov = pd.DataFrame(az.extract(idata['posterior_predictive'])['likelihood'][:, :, 0]).cov()
obs_cov = df[drivers].cov()
model_cov.index = obs_cov.index
model_cov.columns = obs_cov.columns
residuals = model_cov - obs_cov
plot_heatmap(py$residuals, "Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Bayesian Model fit")

Citation

BibTeX citation:
@online{forde,
  author = {Nathaniel Forde},
  title = {Confirmatory {Factor} {Analysis} and {Structural} {Equation}
    {Models}},
  langid = {en}
}
For attribution, please cite this work as:
Nathaniel Forde. n.d. “Confirmatory Factor Analysis and Structural Equation Models.”